/*
 * Copyright (c) Kumo Inc. and affiliates.
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// String type.

#pragma once

#include <algorithm>
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstring>
#include <iosfwd>
#include <limits>
#include <stdexcept>
#include <string>
#include <string_view>
#include <type_traits>
#include <utility>

#include <fmt/format.h>
#include <melon/cportability.h>
#include <melon/cpp_attributes.h>
#include <melon/likely.h>
#include <melon/portability.h>
#include <melon/traits.h>
#include <melon/hash/hash.h>
#include <melon/lang/assume.h>
#include <melon/lang/checked_math.h>
#include <melon/lang/exception.h>
#include <melon/memory/malloc.h>

#if MELON_CPLUSPLUS >= 202002L
#include <compare>
#endif

MELON_PUSH_WARNING
// Ignore shadowing warnings within this file, so includers can use -Wshadow.
MELON_GNU_DISABLE_WARNING("-Wshadow")

namespace melon {
    // When compiling with ASan, always heap-allocate the string even if
    // it would fit in-situ, so that ASan can detect access to the string
    // buffer after it has been invalidated (destroyed, resized, etc.).
    // Note that this flag doesn't remove support for in-situ strings, as
    // that would break ABI-compatibility and wouldn't allow linking code
    // compiled with this flag with code compiled without.
#ifdef MELON_SANITIZE_ADDRESS
#define KMSTRING_DISABLE_SSO true
#else
#define KMSTRING_DISABLE_SSO false
#endif

    namespace kmstring_detail {
        template<class InIt, class OutIt>
        inline std::pair<InIt, OutIt> copy_n(
            InIt b, typename std::iterator_traits<InIt>::difference_type n, OutIt d) {
            for (; n != 0; --n, ++b, ++d) {
                *d = *b;
            }
            return std::make_pair(b, d);
        }

        template<class Pod, class T>
        inline void podFill(Pod *b, Pod *e, T c) {
            assert(b && e && b <= e);
            constexpr auto kUseMemset = sizeof(T) == 1;
            if /* constexpr */ (kUseMemset) {
                memset(b, c, size_t(e - b));
            } else {
                auto const ee = b + ((e - b) & ~7u);
                for (; b != ee; b += 8) {
                    b[0] = c;
                    b[1] = c;
                    b[2] = c;
                    b[3] = c;
                    b[4] = c;
                    b[5] = c;
                    b[6] = c;
                    b[7] = c;
                }
                // Leftovers
                for (; b != e; ++b) {
                    *b = c;
                }
            }
        }

        /*
         * Lightly structured memcpy, simplifies copying PODs and introduces
         * some asserts. Unfortunately using this function may cause
         * measurable overhead (presumably because it adjusts from a begin/end
         * convention to a pointer/size convention, so it does some extra
         * arithmetic even though the caller might have done the inverse
         * adaptation outside).
         */
        template<class Pod>
        inline void podCopy(const Pod *b, const Pod *e, Pod *d) {
            assert(b != nullptr);
            assert(e != nullptr);
            assert(d != nullptr);
            assert(e >= b);
            assert(d >= e || d + (e - b) <= b);
            memcpy(d, b, (e - b) * sizeof(Pod));
        }

        /*
         * Lightly structured memmove, simplifies copying PODs and introduces
         * some asserts
         */
        template<class Pod>
        inline void podMove(const Pod *b, const Pod *e, Pod *d) {
            assert(e >= b);
            memmove(d, b, (e - b) * sizeof(*b));
        }
    } // namespace kmstring_detail

    /**
     * Defines a special acquisition method for constructing kmstring
     * objects. AcquireMallocatedString means that the user passes a
     * pointer to a malloc-allocated string that the kmstring object will
     * take into custody.
     */
    enum class AcquireMallocatedString {
    };

    /*
     * kmstring_core_model is a mock-up type that defines all required
     * signatures of a kmstring core. The kmstring class itself uses such
     * a core object to implement all of the numerous member functions
     * required by the standard.
     *
     * If you want to define a new core, copy the definition below and
     * implement the primitives. Then plug the core into basic_kmstring as
     * a template argument.

    template <class Char>
    class kmstring_core_model {
     public:
      kmstring_core_model();
      kmstring_core_model(const kmstring_core_model &);
      kmstring_core_model& operator=(const kmstring_core_model &) = delete;
      ~kmstring_core_model();
      // Returns a pointer to string's buffer (currently only contiguous
      // strings are supported). The pointer is guaranteed to be valid
      // until the next call to a non-const member function.
      const Char * data() const;
      // Much like data(), except the string is prepared to support
      // character-level changes. This call is a signal for
      // e.g. reference-counted implementation to fork the data. The
      // pointer is guaranteed to be valid until the next call to a
      // non-const member function.
      Char* mutableData();
      // Returns a pointer to string's buffer and guarantees that a
      // readable '\0' lies right after the buffer. The pointer is
      // guaranteed to be valid until the next call to a non-const member
      // function.
      const Char * c_str() const;
      // Shrinks the string by delta characters. Asserts that delta <=
      // size().
      void shrink(size_t delta);
      // Expands the string by delta characters (i.e. after this call
      // size() will report the old size() plus delta) but without
      // initializing the expanded region. The expanded region is
      // zero-terminated. Returns a pointer to the memory to be
      // initialized (the beginning of the expanded portion). The caller
      // is expected to fill the expanded area appropriately.
      // If expGrowth is true, exponential growth is guaranteed.
      // It is not guaranteed not to reallocate even if size() + delta <
      // capacity(), so all references to the buffer are invalidated.
      Char* expandNoinit(size_t delta, bool expGrowth);
      // Expands the string by one character and sets the last character
      // to c.
      void push_back(Char c);
      // Returns the string's size.
      size_t size() const;
      // Returns the string's capacity, i.e. maximum size that the string
      // can grow to without reallocation. Note that for reference counted
      // strings that's technically a lie - even assigning characters
      // within the existing size would cause a reallocation.
      size_t capacity() const;
      // Returns true if the data underlying the string is actually shared
      // across multiple strings (in a refcounted fashion).
      bool isShared() const;
      // Makes sure that at least minCapacity characters are available for
      // the string without reallocation. For reference-counted strings,
      // it should fork the data even if minCapacity < size().
      void reserve(size_t minCapacity);
    };
    */

    /**
     * This is the core of the string. The code should work on 32- and
     * 64-bit and both big- and little-endianan architectures with any
     * Char size.
     *
     * The storage is selected as follows (assuming we store one-byte
     * characters on a 64-bit machine): (a) "small" strings between 0 and
     * 23 chars are stored in-situ without allocation (the rightmost byte
     * stores the size); (b) "medium" strings from 24 through 254 chars
     * are stored in malloc-allocated memory that is copied eagerly; (c)
     * "large" strings of 255 chars and above are stored in a similar
     * structure as medium arrays, except that the string is
     * reference-counted and copied lazily. the reference count is
     * allocated right before the character array.
     *
     * The discriminator between these three strategies sits in two
     * bits of the rightmost char of the storage:
     * - If neither is set, then the string is small. Its length is represented by
     *   the lower-order bits on little-endian or the high-order bits on big-endian
     *   of that rightmost character. The value of these six bits is
     *   `maxSmallSize - size`, so this quantity must be subtracted from
     *   `maxSmallSize` to compute the `size` of the string (see `smallSize()`).
     *   This scheme ensures that when `size == `maxSmallSize`, the last byte in the
     *   storage is \0. This way, storage will be a null-terminated sequence of
     *   bytes, even if all 23 bytes of data are used on a 64-bit architecture.
     *   This enables `c_str()` and `data()` to simply return a pointer to the
     *   storage.
     *
     * - If the MSb is set, the string is medium width.
     *
     * - If the second MSb is set, then the string is large. On little-endian,
     *   these 2 bits are the 2 MSbs of MediumLarge::capacity_, while on
     *   big-endian, these 2 bits are the 2 LSbs. This keeps both little-endian
     *   and big-endian kmstring_core equivalent with merely different ops used
     *   to extract capacity/category.
     */
    template<class Char>
    class kmstring_core {
    public:
        kmstring_core() noexcept { reset(); }

        kmstring_core(const kmstring_core &rhs) {
            assert(&rhs != this);
            switch (rhs.category()) {
                case Category::isSmall:
                    copySmall(rhs);
                    break;
                case Category::isMedium:
                    copyMedium(rhs);
                    break;
                case Category::isLarge:
                    copyLarge(rhs);
                    break;
                default:
                    melon::assume_unreachable();
            }
            assert(size() == rhs.size());
            assert(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0);
        }

        kmstring_core &operator=(const kmstring_core &rhs) = delete;

        kmstring_core(kmstring_core &&goner) noexcept {
            // Take goner's guts
            ml_ = goner.ml_;
            // Clean goner's carcass
            goner.reset();
        }

        kmstring_core(
            const Char *const data,
            const size_t size,
            bool disableSSO = KMSTRING_DISABLE_SSO) {
            if (!disableSSO && size <= maxSmallSize) {
                initSmall(data, size);
            } else if (size <= maxMediumSize) {
                initMedium(data, size);
            } else {
                initLarge(data, size);
            }
            assert(this->size() == size);
            assert(size == 0 || memcmp(this->data(), data, size * sizeof(Char)) == 0);
        }

        ~kmstring_core() noexcept {
            if (category() == Category::isSmall) {
                return;
            }
            destroyMediumLarge();
        }

        // Snatches a previously mallocated string. The parameter "size"
        // is the size of the string, and the parameter "allocatedSize"
        // is the size of the mallocated block.  The string must be
        // \0-terminated, so allocatedSize >= size + 1 and data[size] == '\0'.
        //
        // So if you want a 2-character string, pass malloc(3) as "data",
        // pass 2 as "size", and pass 3 as "allocatedSize".
        kmstring_core(
            Char *const data,
            const size_t size,
            const size_t allocatedSize,
            AcquireMallocatedString) {
            if (size > 0) {
                assert(allocatedSize >= size + 1);
                assert(data[size] == '\0');
                // Use the medium string storage
                ml_.data_ = data;
                ml_.size_ = size;
                // Don't forget about null terminator
                ml_.setCapacity(allocatedSize - 1, Category::isMedium);
            } else {
                // No need for the memory
                free(data);
                reset();
            }
        }

        // swap below doesn't test whether &rhs == this (and instead
        // potentially does extra work) on the premise that the rarity of
        // that situation actually makes the check more expensive than is
        // worth.
        void swap(kmstring_core &rhs) {
            auto const t = ml_;
            ml_ = rhs.ml_;
            rhs.ml_ = t;
        }

        // In C++11 data() and c_str() are 100% equivalent.
        const Char *data() const { return c_str(); }

        Char *data() { return c_str(); }

        Char *mutableData() {
            switch (category()) {
                case Category::isSmall:
                    return small_;
                case Category::isMedium:
                    return ml_.data_;
                case Category::isLarge:
                    return mutableDataLarge();
            }
            melon::assume_unreachable();
        }

        const Char *c_str() const {
            const Char *ptr = ml_.data_;
            // With this syntax, GCC and Clang generate a CMOV instead of a branch.
            ptr = (category() == Category::isSmall) ? small_ : ptr;
            return ptr;
        }

        void shrink(const size_t delta) {
            if (category() == Category::isSmall) {
                shrinkSmall(delta);
            } else if (
                category() == Category::isMedium || RefCounted::refs(ml_.data_) == 1) {
                shrinkMedium(delta);
            } else {
                shrinkLarge(delta);
            }
        }

        MELON_NOINLINE
        void reserve(size_t minCapacity, bool disableSSO = KMSTRING_DISABLE_SSO) {
            MELON_PUSH_WARNING
            MELON_CLANG_DISABLE_WARNING("-Wcovered-switch-default")
            switch (category()) {
                case Category::isSmall:
                    reserveSmall(minCapacity, disableSSO);
                    break;
                case Category::isMedium:
                    reserveMedium(minCapacity);
                    break;
                case Category::isLarge:
                    reserveLarge(minCapacity);
                    break;
                default:
                    melon::assume_unreachable();
            }
            MELON_POP_WARNING
            assert(capacity() >= minCapacity);
        }

        Char *expandNoinit(
            const size_t delta,
            bool expGrowth = false,
            bool disableSSO = KMSTRING_DISABLE_SSO);

        void push_back(Char c) { *expandNoinit(1, /* expGrowth = */ true) = c; }

        size_t size() const {
            size_t ret = ml_.size_;
            if /* constexpr */ (kIsLittleEndian) {
                // We can save a couple instructions, because the category is
                // small iff the last char, as unsigned, is <= maxSmallSize.
                typedef typename std::make_unsigned<Char>::type UChar;
                auto maybeSmallSize = size_t(maxSmallSize) -
                                      size_t(static_cast<UChar>(small_[maxSmallSize]));
                // With this syntax, GCC and Clang generate a CMOV instead of a branch.
                ret =
                        (static_cast<ptrdiff_t>(maybeSmallSize) >= 0) ? maybeSmallSize : ret;
            } else {
                ret = (category() == Category::isSmall) ? smallSize() : ret;
            }
            return ret;
        }

        size_t capacity() const {
            MELON_PUSH_WARNING
            MELON_CLANG_DISABLE_WARNING("-Wcovered-switch-default")
            switch (category()) {
                case Category::isSmall:
                    return maxSmallSize;
                case Category::isLarge:
                    // For large-sized strings, a multi-referenced chunk has no
                    // available capacity. This is because any attempt to append
                    // data would trigger a new allocation.
                    if (RefCounted::refs(ml_.data_) > 1) {
                        return ml_.size_;
                    }
                    break;
                case Category::isMedium:
                default:
                    break;
            }
            MELON_POP_WARNING
            return ml_.capacity();
        }

        bool isShared() const {
            return category() == Category::isLarge && RefCounted::refs(ml_.data_) > 1;
        }

    private:
        Char *c_str() {
            Char *ptr = ml_.data_;
            // With this syntax, GCC and Clang generate a CMOV instead of a branch.
            ptr = (category() == Category::isSmall) ? small_ : ptr;
            return ptr;
        }

        void reset() { setSmallSize(0); }

        MELON_NOINLINE void destroyMediumLarge() noexcept {
            auto const c = category();
            assert(c != Category::isSmall);
            if (c == Category::isMedium) {
                free(ml_.data_);
            } else {
                RefCounted::decrementRefs(ml_.data_);
            }
        }

        struct RefCounted {
            std::atomic<size_t> refCount_;
            Char data_[1];

            constexpr static size_t getDataOffset() {
                return offsetof(RefCounted, data_);
            }

            static RefCounted *fromData(Char *p) {
                return static_cast<RefCounted *>(static_cast<void *>(
                    static_cast<unsigned char *>(static_cast<void *>(p)) -
                    getDataOffset()));
            }

            static size_t refs(Char *p) {
                return fromData(p)->refCount_.load(std::memory_order_acquire);
            }

            static void incrementRefs(Char *p) {
                fromData(p)->refCount_.fetch_add(1, std::memory_order_acq_rel);
            }

            static void decrementRefs(Char *p) {
                auto const dis = fromData(p);
                size_t oldcnt = dis->refCount_.fetch_sub(1, std::memory_order_acq_rel);
                assert(oldcnt > 0);
                if (oldcnt == 1) {
                    ::free(dis);
                }
            }

            static RefCounted *create(size_t *size) {
                size_t capacityBytes;
                if (!melon::checked_add(&capacityBytes, *size, size_t(1))) {
                    throw_exception(std::length_error(""));
                }
                if (!melon::checked_muladd(
                    &capacityBytes, capacityBytes, sizeof(Char), getDataOffset())) {
                    throw_exception(std::length_error(""));
                }
                const size_t allocSize = goodMallocSize(capacityBytes);
                auto result = static_cast<RefCounted *>(checkedMalloc(allocSize));
                result->refCount_.store(1, std::memory_order_release);
                *size = (allocSize - getDataOffset()) / sizeof(Char) - 1;
                return result;
            }

            static RefCounted *create(const Char *data, size_t *size) {
                const size_t effectiveSize = *size;
                auto result = create(size);
                if (MELON_LIKELY(effectiveSize > 0)) {
                    kmstring_detail::podCopy(data, data + effectiveSize, result->data_);
                }
                return result;
            }

            static RefCounted *reallocate(
                Char *const data,
                const size_t currentSize,
                const size_t currentCapacity,
                size_t *newCapacity) {
                assert(*newCapacity > 0 && *newCapacity > currentSize);
                size_t capacityBytes;
                if (!melon::checked_add(&capacityBytes, *newCapacity, size_t(1))) {
                    throw_exception(std::length_error(""));
                }
                if (!melon::checked_muladd(
                    &capacityBytes, capacityBytes, sizeof(Char), getDataOffset())) {
                    throw_exception(std::length_error(""));
                }
                const size_t allocNewCapacity = goodMallocSize(capacityBytes);
                auto const dis = fromData(data);
                assert(dis->refCount_.load(std::memory_order_acquire) == 1);
                auto result = static_cast<RefCounted *>(smartRealloc(
                    dis,
                    getDataOffset() + (currentSize + 1) * sizeof(Char),
                    getDataOffset() + (currentCapacity + 1) * sizeof(Char),
                    allocNewCapacity));
                assert(result->refCount_.load(std::memory_order_acquire) == 1);
                *newCapacity = (allocNewCapacity - getDataOffset()) / sizeof(Char) - 1;
                return result;
            }
        };

        typedef uint8_t category_type;

        enum class Category : category_type {
            isSmall = 0,
            isMedium = kIsLittleEndian ? 0x80 : 0x2,
            isLarge = kIsLittleEndian ? 0x40 : 0x1,
        };

        Category category() const {
            // works for both big-endian and little-endian
            return static_cast<Category>(bytes_[lastChar] & categoryExtractMask);
        }

        struct MediumLarge {
            Char *data_;
            size_t size_;
            size_t capacity_;

            size_t capacity() const {
                return kIsLittleEndian ? capacity_ & capacityExtractMask : capacity_ >> 2;
            }

            void setCapacity(size_t cap, Category cat) {
                capacity_ = kIsLittleEndian
                                ? cap | (static_cast<size_t>(cat) << kCategoryShift)
                                : (cap << 2) | static_cast<size_t>(cat);
            }
        };

        union {
            uint8_t bytes_[sizeof(MediumLarge)]; // For accessing the last byte.
            Char small_[sizeof(MediumLarge) / sizeof(Char)];
            MediumLarge ml_;
        };

        constexpr static size_t lastChar = sizeof(MediumLarge) - 1;
        constexpr static size_t maxSmallSize = lastChar / sizeof(Char);
        constexpr static size_t maxMediumSize = 254 / sizeof(Char);
        constexpr static uint8_t categoryExtractMask = kIsLittleEndian ? 0xC0 : 0x3;
        constexpr static size_t kCategoryShift = (sizeof(size_t) - 1) * 8;
        constexpr static size_t capacityExtractMask = kIsLittleEndian
                                                          ? ~(size_t(categoryExtractMask) << kCategoryShift)
                                                          : 0x0 /* unused */;

        static_assert(
            !(sizeof(MediumLarge) % sizeof(Char)),
            "Corrupt memory layout for kmstring.");

        size_t smallSize() const {
            assert(category() == Category::isSmall);
            constexpr auto shift = kIsLittleEndian ? 0 : 2;
            auto smallShifted = static_cast<size_t>(small_[maxSmallSize]) >> shift;
            assert(static_cast<size_t>(maxSmallSize) >= smallShifted);
            return static_cast<size_t>(maxSmallSize) - smallShifted;
        }

        void setSmallSize(size_t s) {
            // Warning: this should work with uninitialized strings too,
            // so don't assume anything about the previous value of
            // small_[maxSmallSize].
            assert(s <= maxSmallSize);
            constexpr auto shift = kIsLittleEndian ? 0 : 2;
            small_[maxSmallSize] = char((maxSmallSize - s) << shift);
            small_[s] = '\0';
            assert(category() == Category::isSmall && size() == s);
        }

        void copySmall(const kmstring_core &);

        void copyMedium(const kmstring_core &);

        void copyLarge(const kmstring_core &);

        void initSmall(const Char *data, size_t size);

        void initMedium(const Char *data, size_t size);

        void initLarge(const Char *data, size_t size);

        void reserveSmall(size_t minCapacity, bool disableSSO);

        void reserveMedium(size_t minCapacity);

        void reserveLarge(size_t minCapacity);

        void shrinkSmall(size_t delta);

        void shrinkMedium(size_t delta);

        void shrinkLarge(size_t delta);

        void unshare(size_t minCapacity = 0);

        Char *mutableDataLarge();
    };

    template<class Char>
    inline void kmstring_core<Char>::copySmall(const kmstring_core &rhs) {
        static_assert(offsetof(MediumLarge, data_) == 0, "kmstring layout failure");
        static_assert(
            offsetof(MediumLarge, size_) == sizeof(ml_.data_),
            "kmstring layout failure");
        static_assert(
            offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_),
            "kmstring layout failure");
        // Just write the whole thing, don't look at details. In
        // particular we need to copy capacity anyway because we want
        // to set the size (don't forget that the last character,
        // which stores a short string's length, is shared with the
        // ml_.capacity field).
        ml_ = rhs.ml_;
        assert(category() == Category::isSmall && this->size() == rhs.size());
    }

    template<class Char>
    MELON_NOINLINE void kmstring_core<Char>::copyMedium(const kmstring_core &rhs) {
        // Medium strings are copied eagerly. Don't forget to allocate
        // one extra Char for the null terminator.
        auto const allocSize = goodMallocSize((1 + rhs.ml_.size_) * sizeof(Char));
        ml_.data_ = static_cast<Char *>(checkedMalloc(allocSize));
        // Also copies terminator.
        kmstring_detail::podCopy(
            rhs.ml_.data_, rhs.ml_.data_ + rhs.ml_.size_ + 1, ml_.data_);
        ml_.size_ = rhs.ml_.size_;
        ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium);
        assert(category() == Category::isMedium);
    }

    template<class Char>
    MELON_NOINLINE void kmstring_core<Char>::copyLarge(const kmstring_core &rhs) {
        // Large strings are just refcounted
        ml_ = rhs.ml_;
        RefCounted::incrementRefs(ml_.data_);
        assert(category() == Category::isLarge && size() == rhs.size());
    }

    // Small strings are bitblitted
    template<class Char>
    inline void kmstring_core<Char>::initSmall(
        const Char *const data, const size_t size) {
        // Layout is: Char* data_, size_t size_, size_t capacity_
        static_assert(
            sizeof(*this) == sizeof(Char *) + 2 * sizeof(size_t),
            "kmstring has unexpected size");
        static_assert(
            sizeof(Char *) == sizeof(size_t), "kmstring size assumption violation");
        // sizeof(size_t) must be a power of 2
        static_assert(
            (sizeof(size_t) & (sizeof(size_t) - 1)) == 0,
            "kmstring size assumption violation");

        // If data is aligned, use fast word-wise copying. Otherwise,
        // use conservative memcpy.
        // The word-wise path reads bytes which are outside the range of
        // the string, and makes ASan unhappy, so we disable it when
        // compiling with ASan.
#ifndef MELON_SANITIZE_ADDRESS
        if ((reinterpret_cast<size_t>(data) & (sizeof(size_t) - 1)) == 0) {
            const size_t byteSize = size * sizeof(Char);
            constexpr size_t wordWidth = sizeof(size_t);
            switch ((byteSize + wordWidth - 1) / wordWidth) {
                // Number of words.
                case 3:
                    ml_.capacity_ = reinterpret_cast<const size_t *>(data)[2];
                    [[fallthrough]];
                case 2:
                    ml_.size_ = reinterpret_cast<const size_t *>(data)[1];
                    [[fallthrough]];
                case 1:
                    ml_.data_ = *reinterpret_cast<Char **>(const_cast<Char *>(data));
                    [[fallthrough]];
                case 0:
                    break;
            }
        } else
#endif
        {
            if (size != 0) {
                kmstring_detail::podCopy(data, data + size, small_);
            }
        }
        setSmallSize(size);
    }

    template<class Char>
    MELON_NOINLINE void kmstring_core<Char>::initMedium(
        const Char *const data, const size_t size) {
        // Medium strings are allocated normally. Don't forget to
        // allocate one extra Char for the terminating null.
        auto const allocSize = goodMallocSize((1 + size) * sizeof(Char));
        ml_.data_ = static_cast<Char *>(checkedMalloc(allocSize));
        if (MELON_LIKELY(size > 0)) {
            kmstring_detail::podCopy(data, data + size, ml_.data_);
        }
        ml_.size_ = size;
        ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium);
        ml_.data_[size] = '\0';
    }

    template<class Char>
    MELON_NOINLINE void kmstring_core<Char>::initLarge(
        const Char *const data, const size_t size) {
        // Large strings are allocated differently
        size_t effectiveCapacity = size;
        auto const newRC = RefCounted::create(data, &effectiveCapacity);
        ml_.data_ = newRC->data_;
        ml_.size_ = size;
        ml_.setCapacity(effectiveCapacity, Category::isLarge);
        ml_.data_[size] = '\0';
    }

    template<class Char>
    MELON_NOINLINE void kmstring_core<Char>::unshare(size_t minCapacity) {
        assert(category() == Category::isLarge);
        size_t effectiveCapacity = std::max(minCapacity, ml_.capacity());
        auto const newRC = RefCounted::create(&effectiveCapacity);
        // If this fails, someone placed the wrong capacity in an
        // kmstring.
        assert(effectiveCapacity >= ml_.capacity());
        // Also copies terminator.
        kmstring_detail::podCopy(ml_.data_, ml_.data_ + ml_.size_ + 1, newRC->data_);
        RefCounted::decrementRefs(ml_.data_);
        ml_.data_ = newRC->data_;
        ml_.setCapacity(effectiveCapacity, Category::isLarge);
        // size_ remains unchanged.
    }

    template<class Char>
    inline Char *kmstring_core<Char>::mutableDataLarge() {
        assert(category() == Category::isLarge);
        if (RefCounted::refs(ml_.data_) > 1) {
            // Ensure unique.
            unshare();
        }
        return ml_.data_;
    }

    template<class Char>
    MELON_NOINLINE void kmstring_core<Char>::reserveLarge(size_t minCapacity) {
        assert(category() == Category::isLarge);
        if (RefCounted::refs(ml_.data_) > 1) {
            // Ensure unique
            // We must make it unique regardless; in-place reallocation is
            // useless if the string is shared. In order to not surprise
            // people, reserve the new block at current capacity or
            // more. That way, a string's capacity never shrinks after a
            // call to reserve.
            unshare(minCapacity);
        } else {
            // String is not shared, so let's try to realloc (if needed)
            if (minCapacity > ml_.capacity()) {
                // Asking for more memory
                auto const newRC = RefCounted::reallocate(
                    ml_.data_, ml_.size_, ml_.capacity(), &minCapacity);
                ml_.data_ = newRC->data_;
                ml_.setCapacity(minCapacity, Category::isLarge);
            }
            assert(capacity() >= minCapacity);
        }
    }

    template<class Char>
    MELON_NOINLINE void kmstring_core<Char>::reserveMedium(
        const size_t minCapacity) {
        assert(category() == Category::isMedium);
        // String is not shared
        if (minCapacity <= ml_.capacity()) {
            return; // nothing to do, there's enough room
        }
        if (minCapacity <= maxMediumSize) {
            // Keep the string at medium size. Don't forget to allocate
            // one extra Char for the terminating null.
            size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char));
            // Also copies terminator.
            ml_.data_ = static_cast<Char *>(smartRealloc(
                ml_.data_,
                (ml_.size_ + 1) * sizeof(Char),
                (ml_.capacity() + 1) * sizeof(Char),
                capacityBytes));
            ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium);
        } else {
            // Conversion from medium to large string
            kmstring_core nascent;
            // Will recurse to another branch of this function
            nascent.reserve(minCapacity);
            nascent.ml_.size_ = ml_.size_;
            // Also copies terminator.
            kmstring_detail::podCopy(
                ml_.data_, ml_.data_ + ml_.size_ + 1, nascent.ml_.data_);
            nascent.swap(*this);
            assert(capacity() >= minCapacity);
        }
    }

    template<class Char>
    MELON_NOINLINE void kmstring_core<Char>::reserveSmall(
        size_t minCapacity, const bool disableSSO) {
        assert(category() == Category::isSmall);
        if (!disableSSO && minCapacity <= maxSmallSize) {
            // small
            // Nothing to do, everything stays put
        } else if (minCapacity <= maxMediumSize) {
            // medium
            // Don't forget to allocate one extra Char for the terminating null
            auto const allocSizeBytes =
                    goodMallocSize((1 + minCapacity) * sizeof(Char));
            auto const pData = static_cast<Char *>(checkedMalloc(allocSizeBytes));
            auto const size = smallSize();
            // Also copies terminator.
            kmstring_detail::podCopy(small_, small_ + size + 1, pData);
            ml_.data_ = pData;
            ml_.size_ = size;
            ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium);
        } else {
            // large
            auto const newRC = RefCounted::create(&minCapacity);
            auto const size = smallSize();
            // Also copies terminator.
            kmstring_detail::podCopy(small_, small_ + size + 1, newRC->data_);
            ml_.data_ = newRC->data_;
            ml_.size_ = size;
            ml_.setCapacity(minCapacity, Category::isLarge);
            assert(capacity() >= minCapacity);
        }
    }

    template<class Char>
    inline Char *kmstring_core<Char>::expandNoinit(
        const size_t delta,
        bool expGrowth, /* = false */
        bool disableSSO /* = KMSTRING_DISABLE_SSO */) {
        // Strategy is simple: make room, then change size
        assert(capacity() >= size());
        size_t sz, newSz;
        if (category() == Category::isSmall) {
            sz = smallSize();
            newSz = sz + delta;
            if (!disableSSO && MELON_LIKELY(newSz <= maxSmallSize)) {
                setSmallSize(newSz);
                return small_ + sz;
            }
            reserveSmall(
                expGrowth ? std::max(newSz, 2 * maxSmallSize) : newSz, disableSSO);
        } else {
            sz = ml_.size_;
            newSz = sz + delta;
            if (MELON_UNLIKELY(newSz > capacity())) {
                // ensures not shared
                reserve(expGrowth ? std::max(newSz, 1 + capacity() * 3 / 2) : newSz);
            }
        }
        assert(capacity() >= newSz);
        // Category can't be small - we took care of that above
        assert(category() == Category::isMedium || category() == Category::isLarge);
        ml_.size_ = newSz;
        ml_.data_[newSz] = '\0';
        assert(size() == newSz);
        return ml_.data_ + sz;
    }

    template<class Char>
    inline void kmstring_core<Char>::shrinkSmall(const size_t delta) {
        // Check for underflow
        assert(delta <= smallSize());
        setSmallSize(smallSize() - delta);
    }

    template<class Char>
    inline void kmstring_core<Char>::shrinkMedium(const size_t delta) {
        // Medium strings and unique large strings need no special
        // handling.
        assert(ml_.size_ >= delta);
        ml_.size_ -= delta;
        ml_.data_[ml_.size_] = '\0';
    }

    template<class Char>
    inline void kmstring_core<Char>::shrinkLarge(const size_t delta) {
        assert(ml_.size_ >= delta);
        // Shared large string, must make unique. This is because of the
        // durn terminator must be written, which may trample the shared
        // data.
        if (delta) {
            kmstring_core(ml_.data_, ml_.size_ - delta).swap(*this);
        }
        // No need to write the terminator.
    }

    /**
     * Dummy kmstring core that uses an actual std::string. This doesn't
     * make any sense - it's just for testing purposes.
     */
    template<class Char>
    class dummy_kmstring_core {
    public:
        dummy_kmstring_core() {
        }

        dummy_kmstring_core(const dummy_kmstring_core &another)
            : backend_(another.backend_) {
        }

        dummy_kmstring_core(const Char *s, size_t n) : backend_(s, n) {
        }

        void swap(dummy_kmstring_core &rhs) { backend_.swap(rhs.backend_); }
        const Char *data() const { return backend_.data(); }
        Char *mutableData() { return const_cast<Char *>(backend_.data()); }

        void shrink(size_t delta) {
            assert(delta <= size());
            backend_.resize(size() - delta);
        }

        Char *expandNoinit(size_t delta) {
            auto const sz = size();
            backend_.resize(size() + delta);
            return backend_.data() + sz;
        }

        void push_back(Char c) { backend_.push_back(c); }
        size_t size() const { return backend_.size(); }
        size_t capacity() const { return backend_.capacity(); }
        bool isShared() const { return false; }
        void reserve(size_t minCapacity) { backend_.reserve(minCapacity); }

    private:
        std::basic_string<Char> backend_;
    };

    /**
     * This is the basic_string replacement. For conformity,
     * basic_kmstring takes the same template parameters, plus the last
     * one which is the core.
     */
    template<
        typename E,
        class T = std::char_traits<E>,
        class A = std::allocator<E>,
        class Storage = kmstring_core<E> >
    class basic_kmstring {
        static_assert(
            std::is_same<A, std::allocator<E> >::value,
            "kmstring ignores custom allocators");

        template<typename Ex, typename... Args>
        MELON_ALWAYS_INLINE static void enforce(bool condition, Args &&... args) {
            if (!condition) {
                throw_exception<Ex>(static_cast<Args &&>(args)...);
            }
        }

        bool isSane() const {
            return begin() <= end() && empty() == (size() == 0) &&
                   empty() == (begin() == end()) && size() <= max_size() &&
                   capacity() <= max_size() && size() <= capacity() &&
                   begin()[size()] == '\0';
        }

        struct Invariant {
            Invariant &operator=(const Invariant &) = delete;

            explicit Invariant(const basic_kmstring &s) noexcept : s_(s) {
                assert(s_.isSane());
            }

            ~Invariant() noexcept { assert(s_.isSane()); }

        private:
            const basic_kmstring &s_;
        };

    public:
        // types
        typedef T traits_type;
        typedef typename traits_type::char_type value_type;
        typedef A allocator_type;
        typedef typename std::allocator_traits<A>::size_type size_type;
        typedef typename std::allocator_traits<A>::difference_type difference_type;

        typedef typename std::allocator_traits<A>::value_type &reference;
        typedef typename std::allocator_traits<A>::value_type const &const_reference;
        typedef typename std::allocator_traits<A>::pointer pointer;
        typedef typename std::allocator_traits<A>::const_pointer const_pointer;

        typedef E *iterator;
        typedef const E *const_iterator;
        typedef std::reverse_iterator<iterator> reverse_iterator;
        typedef std::reverse_iterator<const_iterator> const_reverse_iterator;

        static constexpr size_type npos = size_type(-1);
        typedef std::true_type IsRelocatable;

    private:
        using string_view_type = std::basic_string_view<value_type, traits_type>;

        static void procrustes(size_type &n, size_type nmax) {
            if (n > nmax) {
                n = nmax;
            }
        }

        static size_type traitsLength(const value_type *s);

        struct string_view_ctor {
        };

        MELON_NOINLINE basic_kmstring(
            string_view_type view, const A &, string_view_ctor)
            : store_(view.data(), view.size()) {
        }

    public:
        // C++11 21.4.2 construct/copy/destroy

        // Note: while the following two constructors can be (and previously were)
        // collapsed into one constructor written this way:
        //
        //   explicit basic_kmstring(const A& a = A()) noexcept { }
        //
        // This can cause Clang (at least version 3.7) to fail with the error:
        //   "chosen constructor is explicit in copy-initialization ...
        //   in implicit initialization of field '(x)' with omitted initializer"
        //
        // if used in a struct which is default-initialized.  Hence the split into
        // these two separate constructors.

        basic_kmstring() noexcept : basic_kmstring(A()) {
        }

        /* implicit */
        basic_kmstring(std::nullptr_t) = delete;

        explicit basic_kmstring(const A &) noexcept {
        }

        basic_kmstring(const basic_kmstring &str) : store_(str.store_) {
        }

        // Move constructor
        basic_kmstring(basic_kmstring &&goner) noexcept
            : store_(std::move(goner.store_)) {
        }

        // This is defined for compatibility with std::string
        template<typename A2>
        /* implicit */ basic_kmstring(const std::basic_string<E, T, A2> &str)
            : store_(str.data(), str.size()) {
        }

        basic_kmstring(
            const basic_kmstring &str,
            size_type pos,
            size_type n = npos,
            const A & /* a */ = A()) {
            assign(str, pos, n);
        }

        MELON_NOINLINE
        /* implicit */basic_kmstring(const value_type *s, const A & /*a*/ = A())
            : store_(s, traitsLength(s)) {
        }

        MELON_NOINLINE
        basic_kmstring(const value_type *s, size_type n, const A & /*a*/ = A())
            : store_(s, n) {
        }

        MELON_NOINLINE
        basic_kmstring(size_type n, value_type c, const A & /*a*/ = A()) {
            auto const pData = store_.expandNoinit(n);
            kmstring_detail::podFill(pData, pData + n, c);
        }

        template<class InIt>
        MELON_NOINLINE basic_kmstring(
            InIt begin,
            InIt end,
            typename std::enable_if<
                !std::is_same<InIt, value_type *>::value,
                const A>::type & /*a*/
                          = A()) {
            assign(begin, end);
        }

        // Specialization for const char*, const char*
        MELON_NOINLINE
        basic_kmstring(const value_type *b, const value_type *e, const A & /*a*/ = A())
            : store_(b, size_type(e - b)) {
        }

        // Nonstandard constructor
        basic_kmstring(
            value_type *s, size_type n, size_type c, AcquireMallocatedString a)
            : store_(s, n, c, a) {
        }

        // Construction from initialization list
        MELON_NOINLINE
        basic_kmstring(std::initializer_list<value_type> il) {
            assign(il.begin(), il.end());
        }

        template<
            typename StringViewLike,
            std::enable_if_t<
                std::is_convertible_v<const StringViewLike &, string_view_type> &&
                !std::is_convertible_v<const StringViewLike &, const value_type *>,
                int>  = 0>
        explicit basic_kmstring(const StringViewLike &view, const A &a = A())
            : basic_kmstring(string_view_type(view), a, string_view_ctor{}) {
        }

        template<
            typename StringViewLike,
            std::enable_if_t<
                std::is_convertible_v<const StringViewLike &, string_view_type> &&
                !std::is_convertible_v<const StringViewLike &, const value_type *>,
                int>  = 0>
        basic_kmstring(
            const StringViewLike &view, size_type pos, size_type n, const A &a = A())
            : basic_kmstring(
                string_view_type(view).substr(pos, n), a, string_view_ctor{}) {
        }

        ~basic_kmstring() noexcept {
        }

        basic_kmstring &operator=(const basic_kmstring &lhs);

        // Move assignment
        basic_kmstring &operator=(basic_kmstring &&goner) noexcept;

        // Compatibility with std::string
        template<typename A2>
        basic_kmstring &operator=(const std::basic_string<E, T, A2> &rhs) {
            return assign(rhs.data(), rhs.size());
        }

        // Compatibility with std::string
        std::basic_string<E, T, A> toStdString() const {
            return std::basic_string<E, T, A>(data(), size());
        }

        basic_kmstring &operator=(std::nullptr_t) = delete;

        basic_kmstring &operator=(const value_type *s) { return assign(s); }

        basic_kmstring &operator=(value_type c);

        // This actually goes directly against the C++ spec, but the
        // value_type overload is dangerous, so we're explicitly deleting
        // any overloads of operator= that could implicitly convert to
        // value_type.
        // Note that we do need to explicitly specify the template types because
        // otherwise MSVC 2017 will aggressively pre-resolve value_type to
        // traits_type::char_type, which won't compare as equal when determining
        // which overload the implementation is referring to.
        template<typename TP>
        typename std::enable_if<
            std::is_convertible<
                TP,
                typename basic_kmstring<E, T, A, Storage>::value_type>::value &&
            !std::is_same<
                typename std::decay<TP>::type,
                typename basic_kmstring<E, T, A, Storage>::value_type>::value,
            basic_kmstring<E, T, A, Storage> &>::type
        operator=(TP c) = delete;

        basic_kmstring &operator=(std::initializer_list<value_type> il) {
            return assign(il.begin(), il.end());
        }

        operator string_view_type() const noexcept { return {data(), size()}; }

        // C++11 21.4.3 iterators:
        iterator begin() { return store_.mutableData(); }

        const_iterator begin() const { return store_.data(); }

        const_iterator cbegin() const { return begin(); }

        iterator end() { return store_.mutableData() + store_.size(); }

        const_iterator end() const { return store_.data() + store_.size(); }

        const_iterator cend() const { return end(); }

        reverse_iterator rbegin() { return reverse_iterator(end()); }

        const_reverse_iterator rbegin() const {
            return const_reverse_iterator(end());
        }

        const_reverse_iterator crbegin() const { return rbegin(); }

        reverse_iterator rend() { return reverse_iterator(begin()); }

        const_reverse_iterator rend() const {
            return const_reverse_iterator(begin());
        }

        const_reverse_iterator crend() const { return rend(); }

        // Added by C++11
        // C++11 21.4.5, element access:
        const value_type &front() const { return *begin(); }

        const value_type &back() const {
            assert(!empty());
            // Should be begin()[size() - 1], but that branches twice
            return *(end() - 1);
        }

        value_type &front() { return *begin(); }

        value_type &back() {
            assert(!empty());
            // Should be begin()[size() - 1], but that branches twice
            return *(end() - 1);
        }

        void pop_back() {
            assert(!empty());
            store_.shrink(1);
        }

        // C++11 21.4.4 capacity:
        size_type size() const { return store_.size(); }

        size_type length() const { return size(); }

        size_type max_size() const { return std::numeric_limits<size_type>::max(); }

        void resize(size_type n, value_type c = value_type());

        size_type capacity() const { return store_.capacity(); }

        void reserve(size_type res_arg = 0) {
            enforce<std::length_error>(res_arg <= max_size(), "");
            store_.reserve(res_arg);
        }

        void shrink_to_fit() {
            // Shrink only if slack memory is sufficiently large
            if (capacity() < size() * 3 / 2) {
                return;
            }
            basic_kmstring(cbegin(), cend()).swap(*this);
        }

        void clear() { resize(0); }

        bool empty() const { return size() == 0; }

        // C++11 21.4.5 element access:
        const_reference operator[](size_type pos) const { return *(begin() + pos); }

        reference operator[](size_type pos) { return *(begin() + pos); }

        const_reference at(size_type n) const {
            enforce<std::out_of_range>(n < size(), "");
            return (*this)[n];
        }

        reference at(size_type n) {
            enforce<std::out_of_range>(n < size(), "");
            return (*this)[n];
        }

        // C++11 21.4.6 modifiers:
        basic_kmstring &operator+=(const basic_kmstring &str) { return append(str); }

        basic_kmstring &operator+=(const value_type *s) { return append(s); }

        basic_kmstring &operator+=(const value_type c) {
            push_back(c);
            return *this;
        }

        basic_kmstring &operator+=(std::initializer_list<value_type> il) {
            append(il);
            return *this;
        }

        basic_kmstring &append(const basic_kmstring &str);

        basic_kmstring &append(
            const basic_kmstring &str, const size_type pos, size_type n);

        basic_kmstring &append(const value_type *s, size_type n);

        basic_kmstring &append(const value_type *s) {
            return append(s, traitsLength(s));
        }

        basic_kmstring &append(size_type n, value_type c);

        template<class InputIterator>
        basic_kmstring &append(InputIterator first, InputIterator last) {
            insert(end(), first, last);
            return *this;
        }

        basic_kmstring &append(std::initializer_list<value_type> il) {
            return append(il.begin(), il.end());
        }

        void push_back(const value_type c) {
            // primitive
            store_.push_back(c);
        }

        basic_kmstring &assign(const basic_kmstring &str) {
            if (&str == this) {
                return *this;
            }
            return assign(str.data(), str.size());
        }

        basic_kmstring &assign(basic_kmstring &&str) {
            return *this = std::move(str);
        }

        basic_kmstring &assign(
            const basic_kmstring &str, const size_type pos, size_type n);

        basic_kmstring &assign(const value_type *s, const size_type n);

        basic_kmstring &assign(const value_type *s) {
            return assign(s, traitsLength(s));
        }

        basic_kmstring &assign(std::initializer_list<value_type> il) {
            return assign(il.begin(), il.end());
        }

        template<class ItOrLength, class ItOrChar>
        basic_kmstring &assign(ItOrLength first_or_n, ItOrChar last_or_c) {
            return replace(begin(), end(), first_or_n, last_or_c);
        }

        basic_kmstring &insert(size_type pos1, const basic_kmstring &str) {
            return insert(pos1, str.data(), str.size());
        }

        basic_kmstring &insert(
            size_type pos1, const basic_kmstring &str, size_type pos2, size_type n) {
            enforce<std::out_of_range>(pos2 <= str.length(), "");
            procrustes(n, str.length() - pos2);
            return insert(pos1, str.data() + pos2, n);
        }

        basic_kmstring &insert(size_type pos, const value_type *s, size_type n) {
            enforce<std::out_of_range>(pos <= length(), "");
            insert(begin() + pos, s, s + n);
            return *this;
        }

        basic_kmstring &insert(size_type pos, const value_type *s) {
            return insert(pos, s, traitsLength(s));
        }

        basic_kmstring &insert(size_type pos, size_type n, value_type c) {
            enforce<std::out_of_range>(pos <= length(), "");
            insert(begin() + pos, n, c);
            return *this;
        }

        iterator insert(const_iterator p, const value_type c) {
            const size_type pos = p - cbegin();
            insert(p, 1, c);
            return begin() + pos;
        }

    private:
        typedef std::basic_istream<value_type, traits_type> istream_type;

        istream_type &getlineImpl(istream_type &is, value_type delim);

    public:
        friend inline istream_type &getline(
            istream_type &is, basic_kmstring &str, value_type delim) {
            return str.getlineImpl(is, delim);
        }

        friend inline istream_type &getline(istream_type &is, basic_kmstring &str) {
            return getline(is, str, '\n');
        }

    private:
        iterator insertImplDiscr(
            const_iterator i, size_type n, value_type c, std::true_type);

        template<class InputIter>
        iterator insertImplDiscr(
            const_iterator i, InputIter b, InputIter e, std::false_type);

        template<class FwdIterator>
        iterator insertImpl(
            const_iterator i,
            FwdIterator s1,
            FwdIterator s2,
            std::forward_iterator_tag);

        template<class InputIterator>
        iterator insertImpl(
            const_iterator i,
            InputIterator b,
            InputIterator e,
            std::input_iterator_tag);

    public:
        template<class ItOrLength, class ItOrChar>
        iterator insert(const_iterator p, ItOrLength first_or_n, ItOrChar last_or_c) {
            using Sel =
                    std::bool_constant<std::numeric_limits<ItOrLength>::is_specialized>;
            return insertImplDiscr(p, first_or_n, last_or_c, Sel());
        }

        iterator insert(const_iterator p, std::initializer_list<value_type> il) {
            return insert(p, il.begin(), il.end());
        }

        basic_kmstring &erase(size_type pos = 0, size_type n = npos) {
            Invariant checker(*this);

            enforce<std::out_of_range>(pos <= length(), "");
            procrustes(n, length() - pos);
            std::copy(begin() + pos + n, end(), begin() + pos);
            resize(length() - n);
            return *this;
        }

        iterator erase(iterator position) {
            const size_type pos(position - begin());
            enforce<std::out_of_range>(pos <= size(), "");
            erase(pos, 1);
            return begin() + pos;
        }

        iterator erase(iterator first, iterator last) {
            const size_type pos(first - begin());
            erase(pos, last - first);
            return begin() + pos;
        }

        // Replaces at most n1 chars of *this, starting with pos1 with the
        // content of str
        basic_kmstring &replace(
            size_type pos1, size_type n1, const basic_kmstring &str) {
            return replace(pos1, n1, str.data(), str.size());
        }

        // Replaces at most n1 chars of *this, starting with pos1,
        // with at most n2 chars of str starting with pos2
        basic_kmstring &replace(
            size_type pos1,
            size_type n1,
            const basic_kmstring &str,
            size_type pos2,
            size_type n2) {
            enforce<std::out_of_range>(pos2 <= str.length(), "");
            return replace(
                pos1, n1, str.data() + pos2, std::min(n2, str.size() - pos2));
        }

        // Replaces at most n1 chars of *this, starting with pos, with chars from s
        basic_kmstring &replace(size_type pos, size_type n1, const value_type *s) {
            return replace(pos, n1, s, traitsLength(s));
        }

        // Replaces at most n1 chars of *this, starting with pos, with n2
        // occurrences of c
        //
        // consolidated with
        //
        // Replaces at most n1 chars of *this, starting with pos, with at
        // most n2 chars of str.  str must have at least n2 chars.
        template<class StrOrLength, class NumOrChar>
        basic_kmstring &replace(
            size_type pos, size_type n1, StrOrLength s_or_n2, NumOrChar n_or_c) {
            Invariant checker(*this);

            enforce<std::out_of_range>(pos <= size(), "");
            procrustes(n1, length() - pos);
            const iterator b = begin() + pos;
            return replace(b, b + n1, s_or_n2, n_or_c);
        }

        basic_kmstring &replace(iterator i1, iterator i2, const basic_kmstring &str) {
            return replace(i1, i2, str.data(), str.length());
        }

        basic_kmstring &replace(iterator i1, iterator i2, const value_type *s) {
            return replace(i1, i2, s, traitsLength(s));
        }

    private:
        basic_kmstring &replaceImplDiscr(
            iterator i1,
            iterator i2,
            const value_type *s,
            size_type n,
            std::integral_constant<int, 2>);

        basic_kmstring &replaceImplDiscr(
            iterator i1,
            iterator i2,
            size_type n2,
            value_type c,
            std::integral_constant<int, 1>);

        template<class InputIter>
        basic_kmstring &replaceImplDiscr(
            iterator i1,
            iterator i2,
            InputIter b,
            InputIter e,
            std::integral_constant<int, 0>);

    private:
        template<class FwdIterator>
        bool replaceAliased(
            iterator /* i1 */,
            iterator /* i2 */,
            FwdIterator /* s1 */,
            FwdIterator /* s2 */,
            std::false_type) {
            return false;
        }

        template<class FwdIterator>
        bool replaceAliased(
            iterator i1, iterator i2, FwdIterator s1, FwdIterator s2, std::true_type);

        template<class FwdIterator>
        void replaceImpl(
            iterator i1,
            iterator i2,
            FwdIterator s1,
            FwdIterator s2,
            std::forward_iterator_tag);

        template<class InputIterator>
        void replaceImpl(
            iterator i1,
            iterator i2,
            InputIterator b,
            InputIterator e,
            std::input_iterator_tag);

    public:
        template<class T1, class T2>
        basic_kmstring &replace(
            iterator i1, iterator i2, T1 first_or_n_or_s, T2 last_or_c_or_n) {
            constexpr bool num1 = std::numeric_limits<T1>::is_specialized,
                    num2 = std::numeric_limits<T2>::is_specialized;
            using Sel =
                    std::integral_constant<int, num1 ? (num2 ? 1 : -1) : (num2 ? 2 : 0)>;
            return replaceImplDiscr(i1, i2, first_or_n_or_s, last_or_c_or_n, Sel());
        }

        size_type copy(value_type *s, size_type n, size_type pos = 0) const {
            enforce<std::out_of_range>(pos <= size(), "");
            procrustes(n, size() - pos);

            if (n != 0) {
                kmstring_detail::podCopy(data() + pos, data() + pos + n, s);
            }
            return n;
        }

        void swap(basic_kmstring &rhs) { store_.swap(rhs.store_); }

        const value_type *c_str() const { return store_.c_str(); }

        const value_type *data() const { return c_str(); }

        value_type *data() { return store_.data(); }

        allocator_type get_allocator() const { return allocator_type(); }

        size_type find(const basic_kmstring &str, size_type pos = 0) const {
            return find(str.data(), pos, str.length());
        }

        size_type find(
            const value_type *needle, size_type pos, size_type nsize) const;

        size_type find(const value_type *s, size_type pos = 0) const {
            return find(s, pos, traitsLength(s));
        }

        size_type find(value_type c, size_type pos = 0) const {
            return find(&c, pos, 1);
        }

        size_type rfind(const basic_kmstring &str, size_type pos = npos) const {
            return rfind(str.data(), pos, str.length());
        }

        size_type rfind(const value_type *s, size_type pos, size_type n) const;

        size_type rfind(const value_type *s, size_type pos = npos) const {
            return rfind(s, pos, traitsLength(s));
        }

        size_type rfind(value_type c, size_type pos = npos) const {
            return rfind(&c, pos, 1);
        }

        size_type find_first_of(const basic_kmstring &str, size_type pos = 0) const {
            return find_first_of(str.data(), pos, str.length());
        }

        size_type find_first_of(
            const value_type *s, size_type pos, size_type n) const;

        size_type find_first_of(const value_type *s, size_type pos = 0) const {
            return find_first_of(s, pos, traitsLength(s));
        }

        size_type find_first_of(value_type c, size_type pos = 0) const {
            return find_first_of(&c, pos, 1);
        }

        size_type find_last_of(
            const basic_kmstring &str, size_type pos = npos) const {
            return find_last_of(str.data(), pos, str.length());
        }

        size_type find_last_of(const value_type *s, size_type pos, size_type n) const;

        size_type find_last_of(const value_type *s, size_type pos = npos) const {
            return find_last_of(s, pos, traitsLength(s));
        }

        size_type find_last_of(value_type c, size_type pos = npos) const {
            return find_last_of(&c, pos, 1);
        }

        size_type find_first_not_of(
            const basic_kmstring &str, size_type pos = 0) const {
            return find_first_not_of(str.data(), pos, str.size());
        }

        size_type find_first_not_of(
            const value_type *s, size_type pos, size_type n) const;

        size_type find_first_not_of(const value_type *s, size_type pos = 0) const {
            return find_first_not_of(s, pos, traitsLength(s));
        }

        size_type find_first_not_of(value_type c, size_type pos = 0) const {
            return find_first_not_of(&c, pos, 1);
        }

        size_type find_last_not_of(
            const basic_kmstring &str, size_type pos = npos) const {
            return find_last_not_of(str.data(), pos, str.length());
        }

        size_type find_last_not_of(
            const value_type *s, size_type pos, size_type n) const;

        size_type find_last_not_of(const value_type *s, size_type pos = npos) const {
            return find_last_not_of(s, pos, traitsLength(s));
        }

        size_type find_last_not_of(value_type c, size_type pos = npos) const {
            return find_last_not_of(&c, pos, 1);
        }

        basic_kmstring substr(size_type pos = 0, size_type n = npos) const & {
            enforce<std::out_of_range>(pos <= size(), "");
            return basic_kmstring(data() + pos, std::min(n, size() - pos));
        }

        basic_kmstring substr(size_type pos = 0, size_type n = npos) && {
            enforce<std::out_of_range>(pos <= size(), "");
            erase(0, pos);
            if (n < size()) {
                resize(n);
            }
            return std::move(*this);
        }

        int compare(const basic_kmstring &str) const {
            // FIX due to Goncalo N M de Carvalho July 18, 2005
            return compare(0, size(), str);
        }

        int compare(size_type pos1, size_type n1, const basic_kmstring &str) const {
            return compare(pos1, n1, str.data(), str.size());
        }

        int compare(size_type pos1, size_type n1, const value_type *s) const {
            return compare(pos1, n1, s, traitsLength(s));
        }

        int compare(
            size_type pos1, size_type n1, const value_type *s, size_type n2) const {
            enforce<std::out_of_range>(pos1 <= size(), "");
            procrustes(n1, size() - pos1);
            // The line below fixed by Jean-Francois Bastien, 04-23-2007. Thanks!
            const int r = traits_type::compare(pos1 + data(), s, std::min(n1, n2));
            return r != 0 ? r : n1 > n2 ? 1 : n1 < n2 ? -1 : 0;
        }

        int compare(
            size_type pos1,
            size_type n1,
            const basic_kmstring &str,
            size_type pos2,
            size_type n2) const {
            enforce<std::out_of_range>(pos2 <= str.size(), "");
            return compare(
                pos1, n1, str.data() + pos2, std::min(n2, str.size() - pos2));
        }

        // Code from Jean-Francois Bastien (03/26/2007)
        int compare(const value_type *s) const {
            // Could forward to compare(0, size(), s, traitsLength(s))
            // but that does two extra checks
            const size_type n1(size()), n2(traitsLength(s));
            const int r = traits_type::compare(data(), s, std::min(n1, n2));
            return r != 0 ? r : n1 > n2 ? 1 : n1 < n2 ? -1 : 0;
        }

#if MELON_CPLUSPLUS >= 202002L
  friend auto operator<=>(
      const basic_kmstring& lhs, const basic_kmstring& rhs) {
    return lhs.spaceship(rhs.data(), rhs.size());
  }
  friend auto operator<=>(const basic_kmstring& lhs, const char* rhs) {
    return lhs.spaceship(rhs, traitsLength(rhs));
  }
  template <typename A2>
  friend auto operator<=>(
      const basic_kmstring& lhs, const std::basic_string<E, T, A2>& rhs) {
    return lhs.spaceship(rhs.data(), rhs.size());
  }
#endif // MELON_CPLUSPLUS >= 202002L

    private:
#if MELON_CPLUSPLUS >= 202002L
  auto spaceship(const value_type* rhsData, size_type rhsSize) const {
    auto c = compare(0, size(), rhsData, rhsSize);
    if (c == 0) {
      return std::strong_ordering::equal;
    } else if (c < 0) {
      return std::strong_ordering::less;
    } else {
      return std::strong_ordering::greater;
    }
  }
#endif // MELON_CPLUSPLUS >= 202002L

        // Data
        Storage store_;
    };

    template<typename E, class T, class A, class S>
    MELON_NOINLINE typename basic_kmstring<E, T, A, S>::size_type
    basic_kmstring<E, T, A, S>::traitsLength(const value_type *s) {
        return s
                   ? traits_type::length(s)
                   : (throw_exception<std::logic_error>(
                          "basic_kmstring: null pointer initializer not valid"),
                      0);
    }

    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::operator=(
        const basic_kmstring &lhs) {
        Invariant checker(*this);

        if (MELON_UNLIKELY(&lhs == this)) {
            return *this;
        }

        return assign(lhs.data(), lhs.size());
    }

    // Move assignment
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::operator=(
        basic_kmstring &&goner) noexcept {
        if (MELON_UNLIKELY(&goner == this)) {
            // Compatibility with std::basic_string<>,
            // C++11 21.4.2 [string.cons] / 23 requires self-move-assignment support.
            return *this;
        }
        // No need of this anymore
        this->~basic_kmstring();
        // Move the goner into this
        new(&store_) S(std::move(goner.store_));
        return *this;
    }

    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::operator=(
        value_type c) {
        Invariant checker(*this);

        if (empty()) {
            store_.expandNoinit(1);
        } else if (store_.isShared()) {
            basic_kmstring(1, c).swap(*this);
            return *this;
        } else {
            store_.shrink(size() - 1);
        }
        front() = c;
        return *this;
    }

    template<typename E, class T, class A, class S>
    inline void basic_kmstring<E, T, A, S>::resize(
        const size_type n, const value_type c /*= value_type()*/) {
        Invariant checker(*this);

        auto size = this->size();
        if (n <= size) {
            store_.shrink(size - n);
        } else {
            auto const delta = n - size;
            auto pData = store_.expandNoinit(delta);
            kmstring_detail::podFill(pData, pData + delta, c);
        }
        assert(this->size() == n);
    }

    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::append(
        const basic_kmstring &str) {
#ifndef NDEBUG
        auto desiredSize = size() + str.size();
#endif
        append(str.data(), str.size());
        assert(size() == desiredSize);
        return *this;
    }

    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::append(
        const basic_kmstring &str, const size_type pos, size_type n) {
        const size_type sz = str.size();
        enforce<std::out_of_range>(pos <= sz, "");
        procrustes(n, sz - pos);
        return append(str.data() + pos, n);
    }

    template<typename E, class T, class A, class S>
    MELON_NOINLINE basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::append(
        const value_type *s, size_type n) {
        Invariant checker(*this);

        if (MELON_UNLIKELY(!n)) {
            // Unlikely but must be done
            return *this;
        }
        auto const oldSize = size();
        auto const oldData = data();
        auto pData = store_.expandNoinit(n, /* expGrowth = */ true);

        // Check for aliasing (rare). We could use "<=" here but in theory
        // those do not work for pointers unless the pointers point to
        // elements in the same array. For that reason we use
        // std::less_equal, which is guaranteed to offer a total order
        // over pointers. See discussion at http://goo.gl/Cy2ya for more
        // info.
        std::less_equal<const value_type *> le;
        if (MELON_UNLIKELY(le(oldData, s) && !le(oldData + oldSize, s))) {
            assert(le(s + n, oldData + oldSize));
            // expandNoinit() could have moved the storage, restore the source.
            s = data() + (s - oldData);
            kmstring_detail::podMove(s, s + n, pData);
        } else {
            kmstring_detail::podCopy(s, s + n, pData);
        }

        assert(size() == oldSize + n);
        return *this;
    }

    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::append(
        size_type n, value_type c) {
        Invariant checker(*this);
        auto pData = store_.expandNoinit(n, /* expGrowth = */ true);
        kmstring_detail::podFill(pData, pData + n, c);
        return *this;
    }

    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::assign(
        const basic_kmstring &str, const size_type pos, size_type n) {
        const size_type sz = str.size();
        enforce<std::out_of_range>(pos <= sz, "");
        procrustes(n, sz - pos);
        return assign(str.data() + pos, n);
    }

    template<typename E, class T, class A, class S>
    MELON_NOINLINE basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::assign(
        const value_type *s, const size_type n) {
        Invariant checker(*this);

        if (n == 0) {
            resize(0);
        } else if (size() >= n) {
            // s can alias this, we need to use podMove.
            kmstring_detail::podMove(s, s + n, store_.mutableData());
            store_.shrink(size() - n);
            assert(size() == n);
        } else {
            // If n is larger than size(), s cannot alias this string's
            // storage.
            resize(0);
            // Do not use exponential growth here: assign() should be tight,
            // to mirror the behavior of the equivalent constructor.
            kmstring_detail::podCopy(s, s + n, store_.expandNoinit(n));
        }

        assert(size() == n);
        return *this;
    }

    template<typename E, class T, class A, class S>
    inline typename basic_kmstring<E, T, A, S>::istream_type &
    basic_kmstring<E, T, A, S>::getlineImpl(istream_type &is, value_type delim) {
        Invariant checker(*this);

        clear();
        size_t size = 0;
        while (true) {
            size_t avail = capacity() - size;
            // kmstring has 1 byte extra capacity for the null terminator,
            // and getline null-terminates the read string.
            is.getline(store_.expandNoinit(avail), avail + 1, delim);
            size += is.gcount();

            if (is.bad() || is.eof() || !is.fail()) {
                // Done by either failure, end of file, or normal read.
                if (!is.bad() && !is.eof()) {
                    --size; // gcount() also accounts for the delimiter.
                }
                resize(size);
                break;
            }

            assert(size == this->size());
            assert(size == capacity());
            // Start at minimum allocation 63 + terminator = 64.
            reserve(std::max<size_t>(63, 3 * size / 2));
            // Clear the error so we can continue reading.
            is.clear();
        }
        return is;
    }

    template<typename E, class T, class A, class S>
    inline typename basic_kmstring<E, T, A, S>::size_type
    basic_kmstring<E, T, A, S>::find(
        const value_type *needle,
        const size_type pos,
        const size_type nsize) const {
        auto const size = this->size();
        // nsize + pos can overflow (eg pos == npos), guard against that by checking
        // that nsize + pos does not wrap around.
        if (nsize + pos > size || nsize + pos < pos) {
            return npos;
        }

        if (nsize == 0) {
            return pos;
        }
        // Don't use std::search, use a Boyer-Moore-like trick by comparing
        // the last characters first
        auto const haystack = data();
        auto const nsize_1 = nsize - 1;
        auto const lastNeedle = needle[nsize_1];

        // Boyer-Moore skip value for the last char in the needle. Zero is
        // not a valid value; skip will be computed the first time it's
        // needed.
        size_type skip = 0;

        const E *i = haystack + pos;
        auto iEnd = haystack + size - nsize_1;

        while (i < iEnd) {
            // Boyer-Moore: match the last element in the needle
            while (i[nsize_1] != lastNeedle) {
                if (++i == iEnd) {
                    // not found
                    return npos;
                }
            }
            // Here we know that the last char matches
            // Continue in pedestrian mode
            for (size_t j = 0;;) {
                assert(j < nsize);
                if (i[j] != needle[j]) {
                    // Not found, we can skip
                    // Compute the skip value lazily
                    if (skip == 0) {
                        skip = 1;
                        while (skip <= nsize_1 && needle[nsize_1 - skip] != lastNeedle) {
                            ++skip;
                        }
                    }
                    i += skip;
                    break;
                }
                // Check if done searching
                if (++j == nsize) {
                    // Yay
                    return i - haystack;
                }
            }
        }
        return npos;
    }

    template<typename E, class T, class A, class S>
    inline typename basic_kmstring<E, T, A, S>::iterator
    basic_kmstring<E, T, A, S>::insertImplDiscr(
        const_iterator i, size_type n, value_type c, std::true_type) {
        Invariant checker(*this);

        assert(i >= cbegin() && i <= cend());
        const size_type pos = i - cbegin();

        auto oldSize = size();
        store_.expandNoinit(n, /* expGrowth = */ true);
        auto b = begin();
        kmstring_detail::podMove(b + pos, b + oldSize, b + pos + n);
        kmstring_detail::podFill(b + pos, b + pos + n, c);

        return b + pos;
    }

    template<typename E, class T, class A, class S>
    template<class InputIter>
    inline typename basic_kmstring<E, T, A, S>::iterator
    basic_kmstring<E, T, A, S>::insertImplDiscr(
        const_iterator i, InputIter b, InputIter e, std::false_type) {
        return insertImpl(
            i, b, e, typename std::iterator_traits<InputIter>::iterator_category());
    }

    template<typename E, class T, class A, class S>
    template<class FwdIterator>
    inline typename basic_kmstring<E, T, A, S>::iterator
    basic_kmstring<E, T, A, S>::insertImpl(
        const_iterator i,
        FwdIterator s1,
        FwdIterator s2,
        std::forward_iterator_tag) {
        Invariant checker(*this);

        assert(i >= cbegin() && i <= cend());
        const size_type pos = i - cbegin();
        auto n = std::distance(s1, s2);
        assert(n >= 0);

        auto oldSize = size();
        store_.expandNoinit(n, /* expGrowth = */ true);
        auto b = begin();
        kmstring_detail::podMove(b + pos, b + oldSize, b + pos + n);
        std::copy(s1, s2, b + pos);

        return b + pos;
    }

    template<typename E, class T, class A, class S>
    template<class InputIterator>
    inline typename basic_kmstring<E, T, A, S>::iterator
    basic_kmstring<E, T, A, S>::insertImpl(
        const_iterator i,
        InputIterator b,
        InputIterator e,
        std::input_iterator_tag) {
        const auto pos = i - cbegin();
        basic_kmstring temp(cbegin(), i);
        for (; b != e; ++b) {
            temp.push_back(*b);
        }
        temp.append(i, cend());
        swap(temp);
        return begin() + pos;
    }

    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::replaceImplDiscr(
        iterator i1,
        iterator i2,
        const value_type *s,
        size_type n,
        std::integral_constant<int, 2>) {
        assert(i1 <= i2);
        assert(begin() <= i1 && i1 <= end());
        assert(begin() <= i2 && i2 <= end());
        return replace(i1, i2, s, s + n);
    }

    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::replaceImplDiscr(
        iterator i1,
        iterator i2,
        size_type n2,
        value_type c,
        std::integral_constant<int, 1>) {
        const size_type n1 = i2 - i1;
        if (n1 > n2) {
            std::fill(i1, i1 + n2, c);
            erase(i1 + n2, i2);
        } else {
            std::fill(i1, i2, c);
            insert(i2, n2 - n1, c);
        }
        assert(isSane());
        return *this;
    }

    template<typename E, class T, class A, class S>
    template<class InputIter>
    inline basic_kmstring<E, T, A, S> &basic_kmstring<E, T, A, S>::replaceImplDiscr(
        iterator i1,
        iterator i2,
        InputIter b,
        InputIter e,
        std::integral_constant<int, 0>) {
        using Cat = typename std::iterator_traits<InputIter>::iterator_category;
        replaceImpl(i1, i2, b, e, Cat());
        return *this;
    }

    template<typename E, class T, class A, class S>
    template<class FwdIterator>
    inline bool basic_kmstring<E, T, A, S>::replaceAliased(
        iterator i1, iterator i2, FwdIterator s1, FwdIterator s2, std::true_type) {
        std::less_equal<const value_type *> le{};
        const bool aliased = le(&*begin(), &*s1) && le(&*s1, &*end());
        if (!aliased) {
            return false;
        }
        // Aliased replace, copy to new string
        basic_kmstring temp;
        temp.reserve(size() - (i2 - i1) + std::distance(s1, s2));
        temp.append(begin(), i1).append(s1, s2).append(i2, end());
        swap(temp);
        return true;
    }

    template<typename E, class T, class A, class S>
    template<class FwdIterator>
    inline void basic_kmstring<E, T, A, S>::replaceImpl(
        iterator i1,
        iterator i2,
        FwdIterator s1,
        FwdIterator s2,
        std::forward_iterator_tag) {
        Invariant checker(*this);

        // Handle aliased replace
        using Sel = std::bool_constant<
            std::is_same<FwdIterator, iterator>::value ||
            std::is_same<FwdIterator, const_iterator>::value>;
        if (replaceAliased(i1, i2, s1, s2, Sel())) {
            return;
        }

        auto const n1 = i2 - i1;
        assert(n1 >= 0);
        auto const n2 = std::distance(s1, s2);
        assert(n2 >= 0);

        if (n1 > n2) {
            // shrinks
            std::copy(s1, s2, i1);
            erase(i1 + n2, i2);
        } else {
            // grows
            s1 = kmstring_detail::copy_n(s1, n1, i1).first;
            insert(i2, s1, s2);
        }
        assert(isSane());
    }

    template<typename E, class T, class A, class S>
    template<class InputIterator>
    inline void basic_kmstring<E, T, A, S>::replaceImpl(
        iterator i1,
        iterator i2,
        InputIterator b,
        InputIterator e,
        std::input_iterator_tag) {
        basic_kmstring temp(begin(), i1);
        temp.append(b, e).append(i2, end());
        swap(temp);
    }

    template<typename E, class T, class A, class S>
    inline typename basic_kmstring<E, T, A, S>::size_type
    basic_kmstring<E, T, A, S>::rfind(
        const value_type *s, size_type pos, size_type n) const {
        if (n > length()) {
            return npos;
        }
        pos = std::min(pos, length() - n);
        if (n == 0) {
            return pos;
        }

        const_iterator i(begin() + pos);
        for (;; --i) {
            if (traits_type::eq(*i, *s) && traits_type::compare(&*i, s, n) == 0) {
                return i - begin();
            }
            if (i == begin()) {
                break;
            }
        }
        return npos;
    }

    template<typename E, class T, class A, class S>
    inline typename basic_kmstring<E, T, A, S>::size_type
    basic_kmstring<E, T, A, S>::find_first_of(
        const value_type *s, size_type pos, size_type n) const {
        if (pos > length() || n == 0) {
            return npos;
        }
        const_iterator i(begin() + pos), finish(end());
        for (; i != finish; ++i) {
            if (traits_type::find(s, n, *i) != nullptr) {
                return i - begin();
            }
        }
        return npos;
    }

    template<typename E, class T, class A, class S>
    inline typename basic_kmstring<E, T, A, S>::size_type
    basic_kmstring<E, T, A, S>::find_last_of(
        const value_type *s, size_type pos, size_type n) const {
        if (!empty() && n > 0) {
            pos = std::min(pos, length() - 1);
            const_iterator i(begin() + pos);
            for (;; --i) {
                if (traits_type::find(s, n, *i) != nullptr) {
                    return i - begin();
                }
                if (i == begin()) {
                    break;
                }
            }
        }
        return npos;
    }

    template<typename E, class T, class A, class S>
    inline typename basic_kmstring<E, T, A, S>::size_type
    basic_kmstring<E, T, A, S>::find_first_not_of(
        const value_type *s, size_type pos, size_type n) const {
        if (pos < length()) {
            const_iterator i(begin() + pos), finish(end());
            for (; i != finish; ++i) {
                if (traits_type::find(s, n, *i) == nullptr) {
                    return i - begin();
                }
            }
        }
        return npos;
    }

    template<typename E, class T, class A, class S>
    inline typename basic_kmstring<E, T, A, S>::size_type
    basic_kmstring<E, T, A, S>::find_last_not_of(
        const value_type *s, size_type pos, size_type n) const {
        if (!this->empty()) {
            pos = std::min(pos, size() - 1);
            const_iterator i(begin() + pos);
            for (;; --i) {
                if (traits_type::find(s, n, *i) == nullptr) {
                    return i - begin();
                }
                if (i == begin()) {
                    break;
                }
            }
        }
        return npos;
    }

    // non-member functions
    // C++11 21.4.8.1/1
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        const basic_kmstring<E, T, A, S> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        basic_kmstring<E, T, A, S> result;
        result.reserve(lhs.size() + rhs.size());
        result.append(lhs).append(rhs);
        return result;
    }

    // C++11 21.4.8.1/2
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        basic_kmstring<E, T, A, S> &&lhs, const basic_kmstring<E, T, A, S> &rhs) {
        return std::move(lhs.append(rhs));
    }

    // C++11 21.4.8.1/3
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        const basic_kmstring<E, T, A, S> &lhs, basic_kmstring<E, T, A, S> &&rhs) {
        if (rhs.capacity() >= lhs.size() + rhs.size()) {
            // Good, at least we don't need to reallocate
            return std::move(rhs.insert(0, lhs));
        }
        // Meh, no go. Forward to operator+(const&, const&).
        auto const &rhsC = rhs;
        return lhs + rhsC;
    }

    // C++11 21.4.8.1/4
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        basic_kmstring<E, T, A, S> &&lhs, basic_kmstring<E, T, A, S> &&rhs) {
        return std::move(lhs.append(rhs));
    }

    // C++11 21.4.8.1/5
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        const E *lhs, const basic_kmstring<E, T, A, S> &rhs) {
        //
        basic_kmstring<E, T, A, S> result;
        const auto len = basic_kmstring<E, T, A, S>::traits_type::length(lhs);
        result.reserve(len + rhs.size());
        result.append(lhs, len).append(rhs);
        return result;
    }

    // C++11 21.4.8.1/6
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        const E *lhs, basic_kmstring<E, T, A, S> &&rhs) {
        //
        const auto len = basic_kmstring<E, T, A, S>::traits_type::length(lhs);
        if (rhs.capacity() >= len + rhs.size()) {
            // Good, at least we don't need to reallocate
            rhs.insert(rhs.begin(), lhs, lhs + len);
            return std::move(rhs);
        }
        // Meh, no go. Do it by hand since we have len already.
        basic_kmstring<E, T, A, S> result;
        result.reserve(len + rhs.size());
        result.append(lhs, len).append(rhs);
        return result;
    }

    // C++11 21.4.8.1/7
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        E lhs, const basic_kmstring<E, T, A, S> &rhs) {
        basic_kmstring<E, T, A, S> result;
        result.reserve(1 + rhs.size());
        result.push_back(lhs);
        result.append(rhs);
        return result;
    }

    // C++11 21.4.8.1/8
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        E lhs, basic_kmstring<E, T, A, S> &&rhs) {
        //
        if (rhs.capacity() > rhs.size()) {
            // Good, at least we don't need to reallocate
            rhs.insert(rhs.begin(), lhs);
            return std::move(rhs);
        }
        // Meh, no go. Forward to operator+(E, const&).
        auto const &rhsC = rhs;
        return lhs + rhsC;
    }

    // C++11 21.4.8.1/9
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        const basic_kmstring<E, T, A, S> &lhs, const E *rhs) {
        typedef typename basic_kmstring<E, T, A, S>::size_type size_type;
        typedef typename basic_kmstring<E, T, A, S>::traits_type traits_type;

        basic_kmstring<E, T, A, S> result;
        const size_type len = traits_type::length(rhs);
        result.reserve(lhs.size() + len);
        result.append(lhs).append(rhs, len);
        return result;
    }

    // C++11 21.4.8.1/10
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        basic_kmstring<E, T, A, S> &&lhs, const E *rhs) {
        //
        return std::move(lhs += rhs);
    }

    // C++11 21.4.8.1/11
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        const basic_kmstring<E, T, A, S> &lhs, E rhs) {
        basic_kmstring<E, T, A, S> result;
        result.reserve(lhs.size() + 1);
        result.append(lhs);
        result.push_back(rhs);
        return result;
    }

    // C++11 21.4.8.1/12
    template<typename E, class T, class A, class S>
    inline basic_kmstring<E, T, A, S> operator+(
        basic_kmstring<E, T, A, S> &&lhs, E rhs) {
        //
        return std::move(lhs += rhs);
    }

    template<typename E, class T, class A, class S>
    inline bool operator==(
        const basic_kmstring<E, T, A, S> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
    }

    template<typename E, class T, class A, class S>
    inline bool operator==(std::nullptr_t, const basic_kmstring<E, T, A, S> &) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator==(
        const typename basic_kmstring<E, T, A, S>::value_type *lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return rhs == lhs;
    }

    template<typename E, class T, class A, class S>
    inline bool operator==(const basic_kmstring<E, T, A, S> &, std::nullptr_t) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator==(
        const basic_kmstring<E, T, A, S> &lhs,
        const typename basic_kmstring<E, T, A, S>::value_type *rhs) {
        return lhs.compare(rhs) == 0;
    }

    template<typename E, class T, class A, class S>
    inline bool operator!=(
        const basic_kmstring<E, T, A, S> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return !(lhs == rhs);
    }

    template<typename E, class T, class A, class S>
    inline bool operator!=(std::nullptr_t, const basic_kmstring<E, T, A, S> &) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator!=(
        const typename basic_kmstring<E, T, A, S>::value_type *lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return !(lhs == rhs);
    }

    template<typename E, class T, class A, class S>
    inline bool operator!=(const basic_kmstring<E, T, A, S> &, std::nullptr_t) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator!=(
        const basic_kmstring<E, T, A, S> &lhs,
        const typename basic_kmstring<E, T, A, S>::value_type *rhs) {
        return !(lhs == rhs);
    }

    template<typename E, class T, class A, class S>
    inline bool operator<(
        const basic_kmstring<E, T, A, S> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return lhs.compare(rhs) < 0;
    }

    template<typename E, class T, class A, class S>
    inline bool operator<(const basic_kmstring<E, T, A, S> &, std::nullptr_t) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator<(
        const basic_kmstring<E, T, A, S> &lhs,
        const typename basic_kmstring<E, T, A, S>::value_type *rhs) {
        return lhs.compare(rhs) < 0;
    }

    template<typename E, class T, class A, class S>
    inline bool operator<(std::nullptr_t, const basic_kmstring<E, T, A, S> &) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator<(
        const typename basic_kmstring<E, T, A, S>::value_type *lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return rhs.compare(lhs) > 0;
    }

    template<typename E, class T, class A, class S>
    inline bool operator>(
        const basic_kmstring<E, T, A, S> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return rhs < lhs;
    }

    template<typename E, class T, class A, class S>
    inline bool operator>(const basic_kmstring<E, T, A, S> &, std::nullptr_t) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator>(
        const basic_kmstring<E, T, A, S> &lhs,
        const typename basic_kmstring<E, T, A, S>::value_type *rhs) {
        return rhs < lhs;
    }

    template<typename E, class T, class A, class S>
    inline bool operator>(std::nullptr_t, const basic_kmstring<E, T, A, S> &) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator>(
        const typename basic_kmstring<E, T, A, S>::value_type *lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return rhs < lhs;
    }

    template<typename E, class T, class A, class S>
    inline bool operator<=(
        const basic_kmstring<E, T, A, S> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return !(rhs < lhs);
    }

    template<typename E, class T, class A, class S>
    inline bool operator<=(const basic_kmstring<E, T, A, S> &, std::nullptr_t) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator<=(
        const basic_kmstring<E, T, A, S> &lhs,
        const typename basic_kmstring<E, T, A, S>::value_type *rhs) {
        return !(rhs < lhs);
    }

    template<typename E, class T, class A, class S>
    inline bool operator<=(std::nullptr_t, const basic_kmstring<E, T, A, S> &) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator<=(
        const typename basic_kmstring<E, T, A, S>::value_type *lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return !(rhs < lhs);
    }

    template<typename E, class T, class A, class S>
    inline bool operator>=(
        const basic_kmstring<E, T, A, S> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return !(lhs < rhs);
    }

    template<typename E, class T, class A, class S>
    inline bool operator>=(const basic_kmstring<E, T, A, S> &, std::nullptr_t) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator>=(
        const basic_kmstring<E, T, A, S> &lhs,
        const typename basic_kmstring<E, T, A, S>::value_type *rhs) {
        return !(lhs < rhs);
    }

    template<typename E, class T, class A, class S>
    inline bool operator>=(std::nullptr_t, const basic_kmstring<E, T, A, S> &) = delete;

    template<typename E, class T, class A, class S>
    inline bool operator>=(
        const typename basic_kmstring<E, T, A, S>::value_type *lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return !(lhs < rhs);
    }

#if MELON_CPLUSPLUS >= 202002
template <typename E, class T, class A, class S>
inline bool operator<=>(std::nullptr_t, const basic_kmstring<E, T, A, S>&) =
    delete;

template <typename E, class T, class A, class S>
inline bool operator<=>(const basic_kmstring<E, T, A, S>&, std::nullptr_t) =
    delete;
#endif

    // C++11 21.4.8.8
    template<typename E, class T, class A, class S>
    void swap(basic_kmstring<E, T, A, S> &lhs, basic_kmstring<E, T, A, S> &rhs) {
        lhs.swap(rhs);
    }

    // TODO: make this faster.
    template<typename E, class T, class A, class S>
    inline std::basic_istream<
        typename basic_kmstring<E, T, A, S>::value_type,
        typename basic_kmstring<E, T, A, S>::traits_type> &
    operator>>(
        std::basic_istream<
            typename basic_kmstring<E, T, A, S>::value_type,
            typename basic_kmstring<E, T, A, S>::traits_type> &is,
        basic_kmstring<E, T, A, S> &str) {
        typedef std::basic_istream<
                    typename basic_kmstring<E, T, A, S>::value_type,
                    typename basic_kmstring<E, T, A, S>::traits_type>
                _istream_type;
        typename _istream_type::sentry sentry(is);
        size_t extracted = 0;
        typename _istream_type::iostate err = _istream_type::goodbit;
        if (sentry) {
            auto n = is.width();
            if (n <= 0) {
                n = str.max_size();
            }
            str.erase();
            for (auto got = is.rdbuf()->sgetc(); extracted != size_t(n); ++extracted) {
                if (got == T::eof()) {
                    err |= _istream_type::eofbit;
                    is.width(0);
                    break;
                }
                if (isspace(got)) {
                    break;
                }
                str.push_back(got);
                got = is.rdbuf()->snextc();
            }
        }
        if (!extracted) {
            err |= _istream_type::failbit;
        }
        if (err) {
            is.setstate(err);
        }
        return is;
    }

    template<typename E, class T, class A, class S>
    inline std::basic_ostream<
        typename basic_kmstring<E, T, A, S>::value_type,
        typename basic_kmstring<E, T, A, S>::traits_type> &
    operator<<(
        std::basic_ostream<
            typename basic_kmstring<E, T, A, S>::value_type,
            typename basic_kmstring<E, T, A, S>::traits_type> &os,
        const basic_kmstring<E, T, A, S> &str) {
#ifdef _LIBCPP_VERSION
  typedef std::basic_ostream<
      typename basic_kmstring<E, T, A, S>::value_type,
      typename basic_kmstring<E, T, A, S>::traits_type>
      _ostream_type;
  typename _ostream_type::sentry _s(os);
  if (_s) {
    typedef std::ostreambuf_iterator<
        typename basic_kmstring<E, T, A, S>::value_type,
        typename basic_kmstring<E, T, A, S>::traits_type>
        _Ip;
    size_t __len = str.size();
    bool __left =
        (os.flags() & _ostream_type::adjustfield) == _ostream_type::left;
    if (__pad_and_output(
            _Ip(os),
            str.data(),
            __left ? str.data() + __len : str.data(),
            str.data() + __len,
            os,
            os.fill())
            .failed()) {
      os.setstate(_ostream_type::badbit | _ostream_type::failbit);
    }
  }
#elif defined(_MSC_VER)
  typedef decltype(os.precision()) streamsize;
  // MSVC doesn't define __ostream_insert
  os.write(str.data(), static_cast<streamsize>(str.size()));
#else
        std::__ostream_insert(os, str.data(), str.size());
#endif
        return os;
    }

    // basic_string compatibility routines

    template<typename E, class T, class A, class S, class A2>
    inline bool operator==(
        const basic_kmstring<E, T, A, S> &lhs,
        const std::basic_string<E, T, A2> &rhs) {
        return lhs.compare(0, lhs.size(), rhs.data(), rhs.size()) == 0;
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator==(
        const std::basic_string<E, T, A2> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return rhs == lhs;
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator!=(
        const basic_kmstring<E, T, A, S> &lhs,
        const std::basic_string<E, T, A2> &rhs) {
        return !(lhs == rhs);
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator!=(
        const std::basic_string<E, T, A2> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return !(lhs == rhs);
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator<(
        const basic_kmstring<E, T, A, S> &lhs,
        const std::basic_string<E, T, A2> &rhs) {
        return lhs.compare(0, lhs.size(), rhs.data(), rhs.size()) < 0;
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator>(
        const basic_kmstring<E, T, A, S> &lhs,
        const std::basic_string<E, T, A2> &rhs) {
        return lhs.compare(0, lhs.size(), rhs.data(), rhs.size()) > 0;
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator<(
        const std::basic_string<E, T, A2> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return rhs > lhs;
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator>(
        const std::basic_string<E, T, A2> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return rhs < lhs;
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator<=(
        const basic_kmstring<E, T, A, S> &lhs,
        const std::basic_string<E, T, A2> &rhs) {
        return !(lhs > rhs);
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator>=(
        const basic_kmstring<E, T, A, S> &lhs,
        const std::basic_string<E, T, A2> &rhs) {
        return !(lhs < rhs);
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator<=(
        const std::basic_string<E, T, A2> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return !(lhs > rhs);
    }

    template<typename E, class T, class A, class S, class A2>
    inline bool operator>=(
        const std::basic_string<E, T, A2> &lhs,
        const basic_kmstring<E, T, A, S> &rhs) {
        return !(lhs < rhs);
    }

    using kmstring = basic_kmstring<char>;
    using u16kmstring = melon::basic_kmstring<char16_t>;

    // kmstring is relocatable
    template<class T, class R, class A, class S>
    MELON_ASSUME_RELOCATABLE(basic_kmstring<T, R, A, S>);

    // Compatibility function, to make sure toStdString(s) can be called
    // to convert a std::string or kmstring variable s into type std::string
    // with very little overhead if s was already std::string
    inline std::string toStdString(const melon::kmstring &s) {
        return std::string(s.data(), s.size());
    }

    inline const std::string &toStdString(const std::string &s) {
        return s;
    }

    // If called with a temporary, the compiler will select this overload instead
    // of the above, so we don't return a (lvalue) reference to a temporary.
    inline std::string &&toStdString(std::string &&s) {
        return std::move(s);
    }
} // namespace melon

// Hash functions to make kmstring usable with e.g. unordered_map

#define MELON_KMSTRING_HASH1(T)                                        \
  template <>                                                          \
  struct hash<::melon::basic_kmstring<T>> {                            \
    size_t operator()(const ::melon::basic_kmstring<T>& s) const {     \
      return ::melon::hash::fnv32_buf(s.data(), s.size() * sizeof(T)); \
    }                                                                  \
  };

// The C++11 standard says that these four are defined for basic_string
#define MELON_KMSTRING_HASH      \
  MELON_KMSTRING_HASH1(char)     \
  MELON_KMSTRING_HASH1(char16_t) \
  MELON_KMSTRING_HASH1(char32_t) \
  MELON_KMSTRING_HASH1(wchar_t)

namespace std {
    MELON_KMSTRING_HASH
} // namespace std

#undef MELON_KMSTRING_HASH
#undef MELON_KMSTRING_HASH1

MELON_POP_WARNING

#undef KMSTRING_DISABLE_SSO

namespace melon {
    template<class T>
    struct IsSomeString;

    template<>
    struct IsSomeString<kmstring> : std::true_type {
    };
} // namespace melon

template<>
struct fmt::formatter<melon::kmstring> : private formatter<fmt::string_view> {
    using formatter<fmt::string_view>::parse;

    template<typename Context>
    typename Context::iterator format(
        const melon::kmstring &s, Context &ctx) const {
        return formatter<fmt::string_view>::format({s.data(), s.size()}, ctx);
    }
};
